Vignettes:
N.B. delta=0.25, p.threshold=0.0001
library(DSS)
## Loading required package: Biobase
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
## table, tapply, union, unique, unsplit, which.max, which.min
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: BiocParallel
## Loading required package: bsseq
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: IRanges
## Loading required package: GenomeInfoDb
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
##
## Attaching package: 'matrixStats'
## The following objects are masked from 'package:Biobase':
##
## anyMissing, rowMedians
##
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
## The following object is masked from 'package:Biobase':
##
## rowMedians
## Loading required package: parallel
library(plotly)
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:IRanges':
##
## slice
## The following object is masked from 'package:S4Vectors':
##
## rename
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(ggplot2)
library(scales)
source("../Methylation_helper.R")InputFolder <- params$InputFolder
OutputFolder <- params$OutputFolderLoading of bsseq object with CpGs shared by at least 75% of samples.
bsseq_obj <- readRDS("~/DataDir/2.DifferentialAnalysis/Input/bsseq_obj_sharedby75ofall.rds") #getting bsseq object where sample selection and CpG filtering were already performed PCA plot
This PCA is done on the new filtered BSseq object (different from the one in the preliminary exploration script!)
cellTypes_colors <- c(hiPSCs ="#dd1c77", iMeLCs ="#377eb8", hPGCLCs ="#4daf4a", hEGCLCs = "#ff7f00")pca_res <- do_PCA(getMeth(BSseq = bsseq_obj, type = "raw"))
plot_PCA(pca_res = pca_res, anno = pData(bsseq_obj), col_anno = "Type", shape_anno = NULL, custom_colors = cellTypes_colors, point_size = 5)plot_PCA(pca_res = pca_res, anno = pData(bsseq_obj), col_anno = "Type", shape_anno = NULL, pc_x = "PC2", pc_y = "PC3", custom_colors = cellTypes_colors, point_size = 5)Loading results obtained with DSS::DMLtest function
dml_tests_files <- list.files(path = InputFolder, pattern = '*\\.rds', full.names = TRUE)
dml_tests_files <- dml_tests_files[-1]Storing DMRs and DMLs Plotting number of DMRs
nr_tests <- length(dml_tests_files)
DM_tests_Res <- list()
DMR_Res <- list()
DML_Res <- list()
for(i in 1:nr_tests){
DM_tests_Res[[i]] <- readRDS(dml_tests_files[i])
DMR_Res[[i]] <- list()
DML_Res[[i]] <- list()
names(DM_tests_Res)[i] <- sub(".rds", "", basename(dml_tests_files))[i]
names(DMR_Res)[i] <- sub(".rds", "", basename(dml_tests_files))[i]
names(DML_Res)[i] <- sub(".rds", "", basename(dml_tests_files))[i]
DMR_Res[[i]] <- callDMR(DM_tests_Res[[i]], delta=0.25, p.threshold=0.0001)
DML_Res[[i]] <- callDML(DM_tests_Res[[i]], delta=0.25, p.threshold=0.0001)
}DMRbarplot(DMR=DMR_Res, comparison=1, y_lim = c(0,2400), interactive=FALSE)DMRbarplot_length(DMR=DMR_Res, comparison=1)DMRbarplot(DMR=DMR_Res, comparison=3, y_lim = c(0,2400), interactive=FALSE)DMRbarplot_length(DMR=DMR_Res, comparison=3)DMRbarplot(DMR=DMR_Res, comparison=4, y_lim = c(0,2400), interactive=FALSE)DMRbarplot_length(DMR=DMR_Res, comparison=4)DMRbarplot(DMR=DMR_Res, comparison=5, y_lim = c(0,2400), interactive=FALSE)DMRbarplot_length(DMR=DMR_Res, comparison=5)DMRbarplot(DMR=DMR_Res, comparison=6, y_lim = c(0,2400), interactive=FALSE)DMRbarplot_length(DMR=DMR_Res, comparison=6)DMRbarplot(DMR=DMR_Res, comparison=7, y_lim = c(0,2400), interactive=FALSE)DMRbarplot_length(DMR=DMR_Res, comparison=7)Barplot with all together
comparisons <- c("hiPSCsvsiMeLCs", "iMeLCsvshPGCLCs", "hiPSCsvshPGCLCs", "hEGCLCsvshPGCLCs", "hEGCLCsvsiMeLCs", "hEGCLCsvshiPSCs") #ordered with criteria
new_ylabel <- c("hiPSCsvsiMeLCs" = "iMeLCs vs hiPSCs",
"iMeLCsvshPGCLCs" = "hPGCLCs vs iMeLCs",
"hiPSCsvshPGCLCs" = "hPGCLCs vs hiPSCs",
"hEGCLCsvshPGCLCs" = "hPGCLCs vs hEGCLCs",
"hEGCLCsvsiMeLCs" = "iMeLCs vs hEGCLCs",
"hEGCLCsvshiPSCs" = "hiPSCs vs hEGCLCs")
DMRbarplot_all(DMR=DMR_Res, comparisons_ordered=comparisons, label_y=new_ylabel, y_lim = 2400, interactive=FALSE)N.B: Q: Why doesn’t callDMR function return FDR for identified DMRs?
A: In DMR calling, the statistical test is perform for each CpG, and then the significant CpG are merged into regions. It is very difficult to estimate FDR at the region-level, based on the site level test results (p-values or FDR). This is a difficult question for all types of genome-wide assays such as the ChIP-seq. So I rather not to report a DMR-level FDR if it’s not accurate.
saveRDS(DM_tests_Res, paste0(InputFolder, '/', 'DML_test_results.rds'))
saveRDS(DMR_Res, paste0(OutputFolder, '/', 'DMRs.rds'))
saveRDS(DML_Res, paste0(OutputFolder, '/', 'DMLs.rds'))SessionInfo <- sessionInfo()
Date <- date()Date## [1] "Thu Jan 9 18:05:23 2025"
SessionInfo## R version 4.2.1 (2022-06-23)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.4 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] scales_1.2.1 plotly_4.10.2
## [3] ggplot2_3.4.2 DSS_2.46.0
## [5] bsseq_1.34.0 SummarizedExperiment_1.28.0
## [7] MatrixGenerics_1.10.0 matrixStats_1.0.0
## [9] GenomicRanges_1.50.2 GenomeInfoDb_1.34.9
## [11] IRanges_2.32.0 S4Vectors_0.36.2
## [13] BiocParallel_1.32.6 Biobase_2.58.0
## [15] BiocGenerics_0.44.0
##
## loaded via a namespace (and not attached):
## [1] httr_1.4.6 tidyr_1.3.0
## [3] sass_0.4.7 viridisLite_0.4.2
## [5] jsonlite_1.8.7 splines_4.2.1
## [7] DelayedMatrixStats_1.20.0 R.utils_2.12.2
## [9] gtools_3.9.4 bslib_0.5.0
## [11] highr_0.10 BSgenome_1.66.3
## [13] GenomeInfoDbData_1.2.9 Rsamtools_2.14.0
## [15] yaml_2.3.7 pillar_1.9.0
## [17] lattice_0.21-8 glue_1.6.2
## [19] limma_3.54.2 digest_0.6.33
## [21] RColorBrewer_1.1-3 XVector_0.38.0
## [23] colorspace_2.1-0 htmltools_0.5.5
## [25] Matrix_1.6-0 R.oo_1.25.0
## [27] XML_3.99-0.14 pkgconfig_2.0.3
## [29] zlibbioc_1.44.0 purrr_1.0.1
## [31] HDF5Array_1.26.0 tibble_3.2.1
## [33] farver_2.1.1 generics_0.1.3
## [35] ellipsis_0.3.2 withr_2.5.0
## [37] cachem_1.0.8 lazyeval_0.2.2
## [39] cli_3.6.1 magrittr_2.0.3
## [41] crayon_1.5.2 evaluate_0.21
## [43] R.methodsS3_1.8.2 fansi_1.0.4
## [45] tools_4.2.1 data.table_1.14.8
## [47] BiocIO_1.8.0 lifecycle_1.0.3
## [49] Rhdf5lib_1.20.0 munsell_0.5.0
## [51] locfit_1.5-9.7 DelayedArray_0.24.0
## [53] Biostrings_2.66.0 compiler_4.2.1
## [55] jquerylib_0.1.4 rlang_1.1.1
## [57] rhdf5_2.42.1 grid_4.2.1
## [59] RCurl_1.98-1.12 rhdf5filters_1.10.1
## [61] rstudioapi_0.15.0 htmlwidgets_1.6.2
## [63] rjson_0.2.21 crosstalk_1.2.0
## [65] labeling_0.4.2 bitops_1.0-7
## [67] rmarkdown_2.23 restfulr_0.0.15
## [69] gtable_0.3.3 codetools_0.2-19
## [71] R6_2.5.1 GenomicAlignments_1.34.1
## [73] knitr_1.43 dplyr_1.1.2
## [75] rtracklayer_1.58.0 fastmap_1.1.1
## [77] utf8_1.2.3 permute_0.9-7
## [79] Rcpp_1.0.11 vctrs_0.6.3
## [81] tidyselect_1.2.0 xfun_0.39
## [83] sparseMatrixStats_1.10.0